Importing the Libraries:

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Reading the dataset

In [15]:
df = pd.read_csv('accident.csv')
df.head(10) #Specifies first 10 values of the dataset
Out[15]:
Accident_ID State Date Time Reason Number_of_Deaths Number_of_Injuries Road_Type Weather_Conditions Alcohol_Involved Driver_Fatigue Road_Conditions Speed_Limit
0 1001 Andhra Pradesh 01-05-2021 15:30:00 Speeding 2 3 Rural Sunny No No Poor 60.0
1 1002 Karnataka 02-05-2021 18:45:00 Drunk Driving 1 4 Urban Rainy Yes No Good 40.0
2 1003 Delhi 03-05-2021 10:15:00 Poor Road Conditions 0 2 Urban Foggy No No Poor 50.0
3 1004 Maharashtra 04-05-2021 06:00:00 Driver Fatigue 3 6 Rural Sunny No Yes Good 70.0
4 1005 Uttar Pradesh 05-05-2021 21:20:00 Speeding 1 2 Rural Sunny No No Fair 80.0
5 1006 Tamil Nadu 06-05-2021 12:00:00 Drunk Driving 0 1 Urban Sunny Yes No Good 30.0
6 1007 Rajasthan 07-05-2021 14:30:00 Poor Road Conditions 2 5 Rural Rainy No No Poor 60.0
7 1008 West Bengal 08-05-2021 08:00:00 Driver Fatigue 1 3 Urban Sunny No Yes Fair 50.0
8 1009 Gujarat 09-05-2021 17:30:00 Speeding 0 4 Rural Sunny No No Good 60.0
9 1010 Assam 10-05-2021 11:45:00 Poor Road Conditions 1 2 Urban Foggy No No Poor 40.0

Checking the size of the dataset

In [16]:
df.shape
Out[16]:
(300, 13)

Finding the data about the columns and their datatypes

In [17]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Accident_ID         300 non-null    int64  
 1   State               300 non-null    object 
 2   Date                300 non-null    object 
 3   Time                300 non-null    object 
 4   Reason              300 non-null    object 
 5   Number_of_Deaths    300 non-null    int64  
 6   Number_of_Injuries  300 non-null    int64  
 7   Road_Type           300 non-null    object 
 8   Weather_Conditions  300 non-null    object 
 9   Alcohol_Involved    300 non-null    object 
 10  Driver_Fatigue      300 non-null    object 
 11  Road_Conditions     300 non-null    object 
 12  Speed_Limit         299 non-null    float64
dtypes: float64(1), int64(3), object(9)
memory usage: 30.6+ KB

Checking for Null values

In [18]:
df.isnull().sum()
Out[18]:
Accident_ID           0
State                 0
Date                  0
Time                  0
Reason                0
Number_of_Deaths      0
Number_of_Injuries    0
Road_Type             0
Weather_Conditions    0
Alcohol_Involved      0
Driver_Fatigue        0
Road_Conditions       0
Speed_Limit           1
dtype: int64

Dealing with missing values

In [19]:
#We could see 1 null value in the speed limit , hence to fill a value in it let us know more abou the Speed_Limit column
df['Speed_Limit'].describe()
Out[19]:
count    299.000000
mean      56.956522
std       13.047466
min       30.000000
25%       50.000000
50%       60.000000
75%       60.000000
max       90.000000
Name: Speed_Limit, dtype: float64
In [20]:
#After going throught the data in excel, Median value can be apt in this situation
Speed_Limit_median = 60.000000  # Median value from your data info
df['Speed_Limit'].fillna(Speed_Limit_median)
Out[20]:
0      60.0
1      40.0
2      50.0
3      70.0
4      80.0
       ... 
295    70.0
296    50.0
297    40.0
298    60.0
299    70.0
Name: Speed_Limit, Length: 300, dtype: float64
In [21]:
df.nunique()
Out[21]:
Accident_ID           300
State                  27
Date                  210
Time                  127
Reason                  7
Number_of_Deaths        6
Number_of_Injuries      7
Road_Type               2
Weather_Conditions      5
Alcohol_Involved        2
Driver_Fatigue          2
Road_Conditions         3
Speed_Limit             7
dtype: int64

Reasons For Road Accidents

In [22]:
import seaborn as sns
# Convert all values in the 'Reason' column to Uppercase
df['Reason'] = df['Reason'].str.upper()

# Calculate the number of accidents for each reason
reason_counts = df['Reason'].value_counts()

# Define the colors for the pie chart using a seaborn color palette
colors = sns.color_palette('Spectral', len(reason_counts))

# Plot the data as a pie chart
plt.figure(figsize=(5, 5))  # Set the size of the figure
plt.pie(reason_counts, labels=reason_counts.index, colors=colors, autopct='%1.1f%%', startangle=90)
plt.title('Reasons for Accidents', fontsize=18)
plt.axis('equal')  # Make the pie chart circular
plt.show()
No description has been provided for this image

Accident Analysis by State

In [23]:
# Group accidents by state and count them
state_accidents = df.groupby('State')['Accident_ID'].count().reset_index()

# Sort states by the number of accidents
sorted_states = state_accidents.sort_values(by='Accident_ID', ascending=False)

# Display the top 10 states with the most accidents
top_states = sorted_states.head(10)
#print(top_states)
# Define the color palette using seaborn's color map
palette = sns.color_palette("Spectral", len(top_states))

# Create the horizontal bar graph
plt.figure(figsize=(8,4 ))
sns.barplot(x='Accident_ID', y='State', data=top_states, palette=palette, hue='State', legend=False)
plt.title('Number of Accidents by State (Top 10)', fontsize=18)
plt.xlabel('Number of Accidents')
plt.ylabel('State')
plt.show()
No description has been provided for this image

Impact of Weather Condition on Road Accidents

In [24]:
# Group accidents by weather condition and count them
weather_accidents = df.groupby('Weather_Conditions')['Accident_ID'].count().reset_index()

# Sort weather conditions by the number of accidents
sorted_weather = weather_accidents.sort_values(by='Accident_ID', ascending=True)

# Display the weather conditions with the most accidents
top_weather = sorted_weather.head(10)
#print(top_weather)

# Define the color palette using seaborn's viridis color map
palette = sns.color_palette("viridis", len(top_weather))

# Set the style and context
sns.set(style="whitegrid", context="talk")

# Create the vertical bar graph
plt.figure(figsize=(8, 6))
barplot = sns.barplot(x='Weather_Conditions', y='Accident_ID', data=top_weather, palette=palette, hue='Accident_ID', legend=False)

# Add data labels to each bar
for index, value in enumerate(top_weather['Accident_ID']):
    plt.text(index, value, str(value), color='black', ha="center", va="bottom",fontsize=12)

# Add titles and labels
plt.title('Number of Accidents by Weather Condition', fontsize=18)
plt.xlabel('Weather Condition', fontsize=14)
plt.ylabel('Number of Accidents', fontsize=14)
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.show()
No description has been provided for this image

Impact of Speeding

In [25]:
# Group data by speed limit and calculate the average number of deaths and injuries
speed_stats = df.groupby('Speed_Limit', as_index=False)['Number_of_Deaths'].mean()
# Set the style and context
sns.set(style="whitegrid", context="talk")

# Create the line plot
plt.figure(figsize=(7, 4))
sns.lineplot(data=speed_stats, x='Speed_Limit', y='Number_of_Deaths', marker='o', label='Average Number of Deaths', color='green')

# Add titles and labels
plt.title('Impact of Speeding on Accident Severity', fontsize=18)
plt.xlabel('Speed Limit', fontsize=14)
plt.ylabel('Average Number of Deaths', fontsize=14)
plt.legend()
plt.show()
No description has been provided for this image

Alcohol Involved Accidents(State-wise)

In [26]:
import plotly.express as px

# Filter the data to include only accidents with alcohol involvement
alcohol_accidents_df = df[df['Alcohol_Involved'] == 'Yes']

# Count the number of alcohol-related accidents in each state
state_counts = alcohol_accidents_df['State'].value_counts().reset_index()
state_counts.columns = ['State', 'Number_of_Accidents']

# Create a bar plot of the state-wise alcohol-related accidents
fig = px.bar(state_counts, x='State', y='Number_of_Accidents', 
             title='Alcohol-Related Accidents by State', 
             labels={'Number_of_Accidents': 'Number of Accidents'},
             color='Number_of_Accidents',color_continuous_scale= px.colors.sequential.Inferno)

# Rotate the x-tick labels
fig.update_layout(xaxis_tickangle=-90, xaxis_tickfont=dict(size=8))

# Display the plot
fig.show()
In [27]:
# Create a new column to classify the accidents as rural or urban based on the road type
df['Location_Type'] = df['Road_Type'].apply(lambda x: 'Rural' if x.startswith('R') else 'Urban')

# Count the number of accidents by location type
location_counts = df['Location_Type'].value_counts().reset_index()
location_counts.columns = ['Location_Type', 'Count']

# Define the colors
colors = ['#FDFF00', ' #C21807']

# Create the pie chart
fig = px.pie(location_counts, values='Count', names='Location_Type', 
             title='Accidents by Location Type', color_discrete_sequence=colors, 
             hole=0.6, labels={'Count': 'Number of Accidents'})
# Update the traces to add a border
fig.update_traces(marker=dict(line=dict(color='#000000', width=1)))

# Display the plot
fig.show()

Visualizing the Number of Accidents in the 2021, 2022, 2023

In [28]:
import plotly.graph_objects as go
# Ensure that the date column is in datetime format with day first
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)

# Extract the year and month from the date
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month

# Group data by year and month, and count the number of accidents
monthly_accidents = df.groupby(['Year', 'Month']).size().reset_index(name='Number_of_Accidents')

# Define the color palette
colors = px.colors.qualitative.Plotly

# Create a figure object
fig = go.Figure()

# Add traces for each year in descending order
years = sorted(monthly_accidents['Year'].unique(), reverse=True)
for i, year in enumerate(years):
    yearly_data = monthly_accidents[monthly_accidents['Year'] == year]
    fig.add_trace(go.Scatter(
        x=yearly_data['Month'], 
        y=yearly_data['Number_of_Accidents'],
        mode='lines+markers',
        name=str(year),
        visible=(i == 0)  # Show only the first year's trace initially
    ))

# Create the dropdown menu with years in descending order
dropdown_buttons = [
    dict(label='All Years',
         method='update',
         args=[{'visible': [True] * len(years)},
               {'title': 'Monthly Accidents Over Years'}])
]

# Add buttons for each year in descending order
for j, year in enumerate(years):
    visibility = [j == k for k in range(len(years))]
    dropdown_buttons.append(
        dict(label=str(year),
             method='update',
             args=[{'visible': visibility},
                   {'title': f'Monthly Accidents in {year}'}])
    )

# Update layout to add dropdown menu
fig.update_layout(
    title='Monthly Accidents Over Years',
    xaxis_title='Month',
    yaxis_title='Number of Accidents',
    updatemenus=[dict(
        active=0,
        buttons=dropdown_buttons,
        x=0.1,
        y=1.15,
        xanchor='left',
        yanchor='top'
    )]
)

# Display the plot
fig.show()
In [29]:
import matplotlib.animation as animation
from IPython.display import HTML
# Group data by month and calculate the sum of deaths and injuries
monthly_totals = df.groupby('Month').agg({'Number_of_Deaths': 'sum', 'Number_of_Injuries': 'sum'}).reset_index()

# Calculate the total number of deaths and injuries
monthly_totals['Total'] = monthly_totals['Number_of_Deaths'] + monthly_totals['Number_of_Injuries']
In [30]:
# Use interactive mode for Jupyter Notebook
%matplotlib notebook

# Set up the figure and axis
fig, ax = plt.subplots(figsize=(8, 4))

# Function to initialize the plot
def init():
    ax.clear()
    ax.set_xlim(1, 12)
    ax.set_ylim(0, monthly_totals['Total'].max() + 10)
    ax.set_xlabel('Month')
    ax.set_ylabel('Total Number of Deaths and Injuries')
    ax.set_title('Total Number of Deaths and Injuries by Month')
    return ax

# Function to animate the plot
def animate(i):
    ax.clear()
    ax.set_xlim(1, 12)
    ax.set_ylim(0, monthly_totals['Total'].max() + 10)
    ax.set_xlabel('Month')
    ax.set_ylabel('Total Number of Deaths and Injuries')
    ax.set_title('Total Number of Deaths and Injuries by Month')
    ax.bar(monthly_totals['Month'].iloc[:i+1], monthly_totals['Total'].iloc[:i+1], color='#FFA07A')
    return ax

# Create the animation
ani = animation.FuncAnimation(fig, animate, init_func=init, frames=len(monthly_totals), interval=500, repeat=False)

# Display the animation in the notebook
HTML(ani.to_jshtml())
Out[30]:
No description has been provided for this image
In [ ]: